#
# analyzeExperiment.R
#
# Analyze experimental results from the Hill Kousser 
# 2014 field experiment around the June primary 
# election.
#
# Seth Hill and Thad Kousser, September, 2015.
#

rm(list=ls())
if (.Platform$OS == "windows") {
  # Set working directory in location of script if Windows. On unix machine, not necessary..
  f_f <- lapply(sys.frames(),function(x) x$ofile);f_f <- Filter(Negate(is.null),f_f) ; PTH <- dirname(f_f[[length(f_f)]]); setwd(PTH) ; rm(PTH,f_f)
  try(.doit(),silent=T)
}
options(stringsAsFactors=F)
require(data.table)
require(bit64)
require(car)
require(apsrtable)
require(tree)
source("09_analysisFunctions.R") # Functions.

# Parameters.
blocking.vars <- c('age.bin','party.bin','in.toss.up.dist','minority.dist','vote.10.gen','vote.08.gen')
small <- NULL#500e3# # run analysis only on this number of cases when not NULL.
# Which election to analyze, primary or general.
dv <- "vote.14.gen"# "vote.14.gen.pri"#"vote.14.gen"# 

cat("\n===================\nAnalyzing treatment effect for election",dv,"\n===================\n");flush.console()

#
# Call in data.
#  
cat("\nLoading in merged pre- and post-election data at",paste(Sys.time()),"...\n")
regs <- fread("analysisFile.csv")

if (!is.null(small)) {
  cat("\n==============\nRunning analysis on sample of",small,"cases...\n==============\n")
  regs <- regs[sample(seq_len(nrow(regs)),small),]
  flush.console()
}

if (dv == "vote.14.gen") {
  cat("Switching variable `yvar' to 2014 general turnout.\n")
  regs[,vote.14.gen.pri := yvar]
  regs[,yvar := vote.14.gen]
}

#
# Code variables.
#
regs[,party := recode(party.bin,"1='REP';2='DEM';3='NPP';4='OTH'")]
regs[,any.letter := ifelse(treatment.assign != "Control","Any letter","Control")]
regs[,treat.elec.info := 1*(treatment.assign == "Election info")]
regs[,treat.toptwo.info := 1*(treatment.assign == "Top-two info")]
regs[,treat.partisan := 1*(treatment.assign == "Partisan")]

#
# Tables.
#

# Table 1. Turnout by condition.
tab.1 <- makeStats(regs) # all participants.
# Merge in same stats for non-partisans, Dems, Reps, Other party.
tab.1[,party := "All"]
for (pty in c("NPP","DEM","REP","OTH")) {
  tmp <- makeStats(regs[party == pty,])
  tmp[,party := pty]
  tab.1 <- rbindlist(list(tab.1,tmp))
}
# Add records for "any-letter" treatment.
tmp <- makeStats(regs,by.var="any.letter")
tmp[,party := "All"]
tab.1 <- rbindlist(list(tab.1,tmp[any.letter != "Control",]))
for (pty in c("NPP","DEM","REP","OTH")) {
  tmp <- makeStats(regs[party == pty,],by.var="any.letter")
  tmp[,party := pty]
  tab.1 <- rbindlist(list(tab.1,tmp[any.letter != "Control",]))
}

#
# Print out table 1. Use makeRow2 to include OTH.
#
cat("Turnout by condition and party, all cases:\n")
table.1 <- c("\\\\","\\hline","        & All & NPP & DEM & REP & OTH \\\\","\\hline")
for (cond in c("Control","Any letter","Election info","Top-two info","Partisan")) {
  table.1 <- c(table.1,makeRow2(tab.1,cond))
}
cat(paste(table.1,collapse="\n"));flush.console()
write(paste(table.1,collapse="\n"),
      file=ifelse(dv=="vote.14.gen.pri","tablesAndFigs/Table1.tex","tablesAndFigs/14Gen-Table1.tex"))
# Formal tests on diffs of means.
# Create data.table for each test.
pr.tests <- data.table(CJ(party=c("NPP","DEM","REP","OTH"),cond1=c("Election info","Top-two info"),
                          cond2=c("Top-two info","Partisan")))
pr.tests <- pr.tests[cond1 != cond2,] # drop duplicates
# Calculate p-vals.
returnP <- function(pty,c1,c2,regs) {
  res <- regs[party == pty & treatment.assign %in% c(c1,c2),prop.test(table(treatment.assign,yvar))]
  res$p.value
}
pr.tests[, p.val := apply(pr.tests,1,function(x) returnP(pty=x['party'],c1=x['cond1'],c2=x['cond2'],regs=regs))]
cat("Calculated p-values for difference of proportion tests on party treatment heterogeneity.\n")
txt <- sprintf("Of %s two-way difference of proportion tests comparing each individual letter to each other for each partisan group, %s are significant at $p<.05$, two-tailed.%s\n", 
nrow(pr.tests),pr.tests[,sum(p.val < .05)], 
ifelse(pr.tests[,sum(p.val < .05)] == 0,"", # if any pass first test, print out comparisons that pass Bonferroni.
sprintf(" Applying a Bonferroni correction for the number of tests, %s are significant at $p<%1.4f$, two-tailed %s.",pr.tests[,sum(p.val < (.05/.N))],pr.tests[,(.05/.N)],
ifelse(pr.tests[,sum(p.val < (.05)/.N)] == 0,"", paste(pr.tests[p.val < (.05/.N),sprintf("%s %s vs %s",party,cond1,cond2)],collapse=", ")))))
cat(txt)
write(txt,file=ifelse(dv=="vote.14.gen.pri","tablesAndFigs/Table1Note.txt","tablesAndFigs/14Gen-Table1Note.txt"))
print(pr.tests)

#
# Table S2. OLS estimate of treatment effects.
#
# Variables needed.
regs[,any.letter := 1*(treatment.assign != "Control")]
regs[,age.na.too.big := 1*(is.na(age.in.14) | age.in.14 > 90 | age.in.14 < 18)]
print(regs[,table(age.in.14,age.na.too.big,exclude=NULL)])
# Mean-deviate.
regs[age.na.too.big == 0,age.mean.dev := age.in.14 - mean(age.in.14)]
regs[,party.npp := 1*(party == "NPP")]
regs[,party.rep := 1*(party == "REP")]
regs[,party.dem := 1*(party == "DEM")]
regs[,party.oth := 1*(party == "OTH")]
regs[,no.vote.08.10 := 1*(vote.08.gen == 0 & vote.10.gen == 0)]

# Calculate the within estimator by subtracting off block means.
regs.within <- makeWithin(regs)
# Do so only for those aged 24+ who were eligible to
# vote in all primaries in selection criteria.
regs.within2 <- makeWithin(regs[age.in.14 > 23,])

# Models.
all.no.fes <- regs[,lm(I(100*yvar) ~ treat.elec.info + treat.toptwo.info + treat.partisan)]
# Full within FE model.
all.wi.fes <- regs.within[,lm(I(100*yvar) ~ -1 + treat.elec.info + treat.toptwo.info + treat.partisan)]
# Full within FE model, age 24+.
all.wi.fes2 <- regs.within2[,lm(I(100*yvar) ~ -1 + treat.elec.info + treat.toptwo.info + treat.partisan)]
# Only those sent letters.
trt.no.fes <- regs[treatment.assign != "Control",lm(I(100*yvar) ~ treat.toptwo.info + treat.partisan)]
# Only those sent letters within FE model.
trts.within <- makeWithin(regs[treatment.assign != "Control",])
trt.wi.fes <- trts.within[,lm(I(100*yvar) ~ -1 + treat.toptwo.info + treat.partisan)]

print(fred <- apsrtable(all.no.fes,all.wi.fes,all.wi.fes2,trt.no.fes,trt.wi.fes,
                Sweave=T,omitcoef=expression(grep("block",coefnames)),digits=2,
                model.names=c("(All, no FEs)","(All, w FEs)","(Age 24+, w FEs)","(Letters, no FEs)","(Letters, w FEs)"),
                coef.names=c("Intercept","Treatment: Election info","Treatment: Top-two info","Treatment: Partisan")
                ));flush.console()
write(fred,file=ifelse(dv=="vote.14.gen.pri","tablesAndFigs/TableS2.tex","tablesAndFigs/14Gen-TableS2.tex"))
rm(all.no.fes,all.wi.fes,all.wi.fes2,trt.no.fes,trt.wi.fes,regs.within2,trts.within);gc()

#
# Figure 2. Heterogeneous treatment effects by party.
#
het.effs <- makeHetEffs(regs) # all registrants.
het.effs <- rbindlist(list(het.effs,makeHetEffs(regs[party == "NPP",],group.name="NPP"))) # NPP
het.effs <- rbindlist(list(het.effs,makeHetEffs(regs[party == "DEM",],group.name="Dem"))) # Dems
het.effs <- rbindlist(list(het.effs,makeHetEffs(regs[party == "REP",],group.name="Rep"))) # Reps
het.effs <- rbindlist(list(het.effs,makeHetEffs(regs[party == "OTH",],group.name="Oth"))) # Oth
het.effs[,ub := coefs + 1.96*serr]
het.effs[,lb := coefs - 1.96*serr]
# Sorting variables.
het.effs[,group.fac := factor(group,levels=c("All","Rep","Dem","NPP","Oth"))]
setkeyv(het.effs,c("group.fac","lab"))

pdf(ifelse(dv=="vote.14.gen.pri","tablesAndFigs/Figure2.pdf","tablesAndFigs/14Gen-Figure2.pdf"),width=10,height=8)
plotTreatEffs(het.effs,leg.text=c("Any letter v control","Election info v control","Top-two info v control","Partisan v control"),leg.pch=c(18,19,17,15))
dev.off()

#
# Figure 3. Heterogeneous treatment effects by age and turnout.
#
het.effs2 <- makeHetEffs(regs) # all registrants.
# Determin age bins.
age.bins <- regs[,list(min.age=min(age.in.14,na.rm=T),max.age=max(age.in.14,na.rm=T)),by=age.bin]
# Create hetero effects within each age bin.
for (ab in seq(1,age.bins[,max(age.bin)])) { # Ignoring age.bin == 0, missing age.
  grp.name <- sprintf("Age\n%s to %s",age.bins[age.bin == ab,min.age],age.bins[age.bin == ab,max.age])
  if (ab == age.bins[,max(age.bin)]) grp.name <- sprintf("Age\n%s+",age.bins[age.bin == ab,min.age])
  het.effs2 <- rbindlist(list(het.effs2,makeHetEffs(regs[age.bin == ab,],group.name=grp.name)))
}
if (dv == "vote.14.gen") { # checking by different turnout history for general election.
  het.effs2 <- rbindlist(list(het.effs2,makeHetEffs(regs[vote.08.gen == 1,],group.name="Vote 2008\nGeneral")))
  het.effs2 <- rbindlist(list(het.effs2,makeHetEffs(regs[vote.10.gen == 1,],group.name="Vote 2010\nGeneral")))
  het.effs2 <- rbindlist(list(het.effs2,makeHetEffs(regs[vote.10.gen == 0,],group.name="No vote 2010\nGeneral")))
  vote.groups <- c("Vote 2008\nGeneral","Vote 2010\nGeneral","No vote 2010\nGeneral")
} else {
  het.effs2 <- rbindlist(list(het.effs2,makeHetEffs(regs[vote.08.gen == 1 | vote.10.gen == 1,],group.name="Voted 2008 or\n2010 General")))
  het.effs2 <- rbindlist(list(het.effs2,makeHetEffs(regs[vote.10.gen == 0 & vote.08.gen == 0,],group.name="No vote\n2008 or 2010")))
  vote.groups <- c("Voted 2008 or\n2010 General","No vote\n2008 or 2010")
}
het.effs2[,ub := coefs + 1.96*serr]
het.effs2[,lb := coefs - 1.96*serr]
# Sorting variables.
het.effs2[,group.fac := factor(group,levels=c("All",sort(unique(grep("Age",het.effs2[,group],value=T))),
                            vote.groups))]
setkeyv(het.effs2,c("group.fac","lab"))
pdf(ifelse(dv=="vote.14.gen.pri","tablesAndFigs/Figure3.pdf","tablesAndFigs/14Gen-Figure3.pdf"),width=11,height=7)
par.old <- par(cex.axis=.8)
plotTreatEffs(het.effs2,leg.text=c("Any letter v control","Election info v control","Top-two info v control","Partisan v control"),leg.pch=c(18,19,17,15))
par(par.old)
dev.off()

#
# Fig S1. Heterogeneous treatment effects by small
# age groupings.
#
ages <- data.frame(age.min=seq(19,79,by=4))
ages$age.max <- ages$age.min + 3
ages[nrow(ages),"age.max"] <- 99
ages$label <- with(ages,sprintf("%s\nto\n%s",age.min,age.max))
# First het effects for lowest age cat.
het.effs3 <- makeHetEffs(regs[age.in.14 >= ages[1,'age.min'] & age.in.14 <= ages[1,'age.max'],],group.name=ages[1,'label'])
# Het effects for subsequent age cats.
for (i in 2:nrow(ages)) {
  het.effs3 <- rbindlist(list(het.effs3,makeHetEffs(regs[age.in.14 >= ages[i,'age.min'] & age.in.14 <= ages[i,'age.max'],],group.name=ages[i,'label'])))
}
het.effs3[,ub := coefs + 1.96*serr]
het.effs3[,lb := coefs - 1.96*serr]
# Sorting variables.
het.effs3[,group.fac := factor(group)]
setkeyv(het.effs3,c("group.fac","lab"))
pdf(ifelse(dv=="vote.14.gen.pri","tablesAndFigs/FigureS1.pdf","tablesAndFigs/14Gen-FigureS1.pdf"),width=10,height=8)
plotTreatEffs(het.effs3,leg.text=c("Any letter v control","Election info v control","Top-two info v control","Partisan v control"),leg.pch=c(18,19,17,15))
title(xlab="Range of registrant age",line=3.5)
dev.off()

#
# Fig S2. Heterogeneous treatment effects by small
# age groupings cross party.
#
ages <- data.frame(age.min=seq(19,79,by=10))
ages$age.max <- ages$age.min + 9
ages[nrow(ages),"age.max"] <- 99
ages <- merge(ages,data.frame(party=c("DEM","REP","NPP")))
ages$label <- with(ages,sprintf("%s\n%s\nto\n%s",party,age.min,age.max))

# First het effects for lowest age cat.
het.effs4 <- makeHetEffs(regs[age.in.14 >= ages[1,'age.min'] & age.in.14 <= ages[1,'age.max'] & party == ages[1,'party'],],group.name=ages[1,'label'])
# Het effects for subsequent age cats.
for (i in 2:nrow(ages)) {
  het.effs4 <- rbindlist(list(het.effs4,makeHetEffs(regs[age.in.14 >= ages[i,'age.min'] & age.in.14 <= ages[i,'age.max'] & party == ages[i,'party'],],group.name=ages[i,'label'])))
}
het.effs4[,ub := coefs + 1.96*serr]
het.effs4[,lb := coefs - 1.96*serr]
# Sorting variables.
het.effs4[,group.fac := factor(group)]
setkeyv(het.effs4,c("group.fac","lab"))
pdf(ifelse(dv=="vote.14.gen.pri","tablesAndFigs/FigureS2.pdf","tablesAndFigs/14Gen-FigureS2.pdf"),width=10,height=8)
plotTreatEffs(het.effs4,leg.text=c("Any letter v control","Election info v control","Top-two info v control","Partisan v control"),leg.pch=c(18,19,17,15))
title(xlab="Registrant party and age",line=3.5)
dev.off()

#
# Table 2. Heterogeneous treatment effect
# regressions: by party, age, and past turnout.
#
# Basic direct effect model.
lm.no.fes.any <- regs[age.na.too.big == 0,lm(I(100*yvar) ~ any.letter+ age.mean.dev + no.vote.08.10 + party.npp + party.rep + party.oth)]
het.lm.no.fes.any <- regs[age.na.too.big == 0,lm(I(100*yvar) ~ any.letter*age.mean.dev + any.letter*no.vote.08.10 + any.letter*party.npp + any.letter*party.rep + any.letter*party.oth)]
het.lm.no.fes.any.age.binned <- regs[age.na.too.big == 0,lm(I(100*yvar) ~ any.letter*factor(age.bin) + any.letter*no.vote.08.10 + any.letter*party.npp + any.letter*party.rep + any.letter*party.oth)]
het.lm.no.fes.any.age.all <- regs[age.na.too.big == 0,lm(I(100*yvar) ~ any.letter*factor(age.bin) + any.letter*no.vote.08.10 + any.letter*party.npp + any.letter*party.rep + any.letter*party.oth + any.letter*reg.date.pre.08 + any.letter*reg.date.pre.10)]
lm.no.fes.sep <- regs[age.na.too.big == 0,lm(I(100*yvar) ~ treat.elec.info + treat.toptwo.info + treat.partisan + age.mean.dev + no.vote.08.10 + party.npp + party.rep + party.oth)]
het.lm.no.fes.sep <- regs[age.na.too.big == 0,lm(I(100*yvar) ~ treat.elec.info*age.mean.dev + treat.toptwo.info*age.mean.dev + treat.partisan*age.mean.dev + treat.elec.info*no.vote.08.10 + treat.toptwo.info*no.vote.08.10 + treat.partisan*no.vote.08.10 + treat.elec.info*party.npp + treat.toptwo.info*party.npp + treat.partisan*party.npp + treat.elec.info*party.rep + treat.toptwo.info*party.rep + treat.partisan*party.rep + treat.elec.info*party.oth + treat.toptwo.info*party.oth + treat.partisan*party.oth)]
het.lm.no.fes.sep.age.binned <- regs[age.na.too.big == 0,lm(I(100*yvar) ~ treat.elec.info*factor(age.bin) + treat.toptwo.info*factor(age.bin) + treat.partisan*factor(age.bin) + treat.elec.info*no.vote.08.10 + treat.toptwo.info*no.vote.08.10 + treat.partisan*no.vote.08.10 + treat.elec.info*party.npp + treat.toptwo.info*party.npp + treat.partisan*party.npp + treat.elec.info*party.rep + treat.toptwo.info*party.rep + treat.partisan*party.rep + treat.elec.info*party.oth + treat.toptwo.info*party.oth + treat.partisan*party.oth + treat.elec.info*reg.date.pre.08 + treat.toptwo.info*reg.date.pre.08 + treat.partisan*reg.date.pre.08 + treat.elec.info*reg.date.pre.10 + treat.toptwo.info*reg.date.pre.10 + treat.partisan*reg.date.pre.10)]
makeCoefNames <- function(lm.list,coef.labs=list("(Intercept)"="Intercept","any.letter"="Any letter","treat.elec.info"="Election info letter","treat.toptwo.info"="Top-two info letter","treat.partisan"="Partisan letter","age.mean.dev"="Age","no.vote.08.10"="Abstain 08 and 10","party.npp"="Party NPP","party.rep"="Party REP","party.oth"="Party OTH","factor(age.bin)2"="Age 29-38","factor(age.bin)3"="Age 39-48","factor(age.bin)4"="Age 49-58","factor(age.bin)5"="Age 59-68","factor(age.bin)6"="Age 69+","reg.date.pre.08"="Registered prior to 08","reg.date.pre.10"="Registered prior to 10")) {
  # Turn a list of lms into a coefficient name vector. Assumes coefs are listed
  # consistent with argument "lr" in apsrtable.
  coefs <- NULL
  for (i in 1:length(lm.list)) {
    coefs <- union(coefs,names(coef(lm.list[[i]])))
  }
  # Original names ordered appropriately in vector coefs. Now, new names.
  out <- NULL
  for (i in 1:length(coefs)) {
    if (coefs[i] %in% names(coef.labs)) { # exact name
      out <- c(out,coef.labs[[coefs[i]]])
    } else if (regexpr(":",coefs[i]) != -1) { # interaction term.
      splitter <- unlist(strsplit(coefs[i],":"))
      new.coef <- NULL
      for (j in 1:length(splitter)) { # see if each element has a match.
        if (splitter[j] %in% names(coef.labs)) {
          new.coef <- c(new.coef,coef.labs[[splitter[j]]])
        } else {
          new.coef <- c(new.coef,splitter[j])
        }
      }
      # Collapse with asterix.
      out <- c(out,paste(new.coef,collapse="*"))
    } else { # can't find match.
      out <- c(out,coefs[i])
    }
  }
  #print(cbind(coefs,out))
  return(out)
}

print(fred <- apsrtable(lm.no.fes.any,het.lm.no.fes.any,het.lm.no.fes.any.age.binned,het.lm.no.fes.any.age.all,
Sweave=T,omitcoef=expression(grep("block",coefnames)),digits=2,
model.names=c("(Direct effect)","(Age linear)","(Age binned)","(Age binned)"),coef.rows=1,order="lr"
,coef.names=makeCoefNames(list(lm.no.fes.any,het.lm.no.fes.any,het.lm.no.fes.any.age.binned,het.lm.no.fes.any.age.all))));flush.console()
write(fred,file=ifelse(dv=="vote.14.gen.pri","tablesAndFigs/Table2.tex","tablesAndFigs/14Gen-Table2.tex"))

#
# Table S3. Separate treatments: Heterogeneous
# treatment effect regressions: by party, age, and
# past turnout.
#
print(fred <- apsrtable(lm.no.fes.sep,het.lm.no.fes.sep,het.lm.no.fes.sep.age.binned,
                Sweave=T,omitcoef=expression(grep("block",coefnames)),digits=2,
                model.names=c("(Direct effect)","(Age linear)","(Age binned)"),coef.rows=1,
                order="lr"
                ,coef.names=makeCoefNames(list(lm.no.fes.sep,het.lm.no.fes.sep,het.lm.no.fes.sep.age.binned))
                ));flush.console()
write(fred,file=ifelse(dv=="vote.14.gen.pri","tablesAndFigs/TableS3-sep-letters.tex",                                              "tablesAndFigs/14Gen-TableS3-sep-letters.tex"))

                                             
#
# Reprint experiment results at end.
#
cat(paste(table.1,collapse="\n"));flush.console()
